#!/bin/sh
# aside from this initial boilerplate, this is actually -*- scheme -*- code
main='(module-ref (resolve-module '\''(scripts read-scheme-source)) '\'main')'
exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
!#
;;; read-scheme-source --- Read a file, recognizing scheme forms and comments

;; 	Copyright (C) 2001, 2006 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301 USA

;;; Author: Thien-Thi Nguyen

;;; Commentary:

;; Usage: read-scheme-source FILE1 FILE2 ...
;;
;; This program parses each FILE and writes to stdout sexps that describe the
;; top-level structures of the file: scheme forms, single-line comments, and
;; hash-bang comments.  You can further process these (to associate comments
;; w/ scheme forms as a kind of documentation, for example).
;;
;; The output sexps have one of these forms:
;;
;;    (quote (filename FILENAME))
;;
;;    (quote (comment :leading-semicolons N
;;                    :text LINE))
;;
;;    (quote (whitespace :text LINE))
;;
;;    (quote (hash-bang-comment :line LINUM
;;                              :line-count N
;;                              :text-list (LINE1 LINE2 ...)))
;;
;;    (quote (following-form-properties :line LINUM
;;                                      :line-count N)
;;                                      :type TYPE
;;                                      :signature SIGNATURE
;;                                      :std-int-doc DOCSTRING))
;;
;;    SEXP
;;
;; The first four are straightforward (both FILENAME and LINE are strings sans
;; newline, while LINUM and N are integers).  The last two always go together,
;; in that order.  SEXP is scheme code processed only by `read' and then
;; `write'.
;;
;; The :type field may be omitted if the form is not recognized.  Otherwise,
;; TYPE may be one of: procedure, alias, define-module, variable.
;;
;; The :signature field may be omitted if the form is not a procedure.
;; Otherwise, SIGNATURE is a list showing the procedure's signature.
;;
;; If the type is `procedure' and the form has a standard internal docstring
;; (first body form a string), that is extracted in full -- including any
;; embedded newlines -- and recorded by field :std-int-doc.
;;
;;
;; Usage from a program: The output list of sexps can be retrieved by scheme
;; programs w/o having to capture stdout, like so:
;;
;;    (use-modules (scripts read-scheme-source))
;;    (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
;;
;; There are also two convenience procs exported for use by Scheme programs:
;;
;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
;;                   have the same number of leading semicolons.
;;
;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
;;                        the ":tags", and return alist of (TAG . VAL) elems.
;;
;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
;;       Make `annotate!' extensible.

;;; Code:

(define-module (scripts read-scheme-source)
  :use-module (ice-9 rdelim)
  :export (read-scheme-source
           read-scheme-source-silently
           quoted?
           clump))

;; Try to figure out what FORM is and its various attributes.
;; Call proc NOTE! with key (a symbol) and value.
;;
(define (annotate! form note!)
  (cond ((and (list? form)
              (< 2 (length form))
              (eq? 'define (car form))
              (pair? (cadr form))
              (symbol? (caadr form)))
         (note! ':type 'procedure)
         (note! ':signature (cadr form))
         (and (< 3 (length form))
              (string? (caddr form))
              (note! ':std-int-doc (caddr form))))
        ((and (list? form)
              (< 2 (length form))
              (eq? 'define (car form))
              (symbol? (cadr form))
              (list? (caddr form))
              (< 3 (length (caddr form)))
              (eq? 'lambda (car (caddr form)))
              (string? (caddr (caddr form))))
         (note! ':type 'procedure)
         (note! ':signature (cons (cadr form) (cadr (caddr form))))
         (note! ':std-int-doc (caddr (caddr form))))
        ((and (list? form)
              (= 3 (length form))
              (eq? 'define (car form))
              (symbol? (cadr form))
              (symbol? (caddr form)))
         (note! ':type 'alias))
        ((and (list? form)
              (eq? 'define-module (car form)))
         (note! ':type 'define-module))
        ;; Add other types here.
        (else (note! ':type 'variable))))

;; Process FILE, calling NB! on parsed top-level elements.
;; Recognized: #!-!# and regular comments in addition to normal forms.
;;
(define (process file nb!)
  (nb! `'(filename ,file))
  (let ((hash-bang-rx (make-regexp "^#!"))
        (bang-hash-rx (make-regexp "^!#"))
        (all-comment-rx (make-regexp "^[ \t]*(;+)"))
        (all-whitespace-rx (make-regexp "^[ \t]*$"))
        (p (open-input-file file)))
    (let loop ((n (1+ (port-line p))) (line (read-line p)))
      (or (not n)
          (eof-object? line)
          (begin
            (cond ((regexp-exec hash-bang-rx line)
                   (let loop ((line (read-line p))
                              (text (list line)))
                     (if (or (eof-object? line)
                             (regexp-exec bang-hash-rx line))
                         (nb! `'(hash-bang-comment
                                 :line ,n
                                 :line-count ,(1+ (length text))
                                 :text-list ,(reverse
                                              (cons line text))))
                         (loop (read-line p)
                               (cons line text)))))
                  ((regexp-exec all-whitespace-rx line)
                   (nb! `'(whitespace :text ,line)))
                  ((regexp-exec all-comment-rx line)
                   => (lambda (m)
                        (nb! `'(comment
                                :leading-semicolons
                                ,(let ((m1 (vector-ref m 1)))
                                   (- (cdr m1) (car m1)))
                                :text ,line))))
                  (else
                   (unread-string line p)
                   (let* ((form (read p))
                          (count (- (port-line p) n))
                          (props (let* ((props '())
                                        (prop+ (lambda args
                                                 (set! props
                                                       (append props args)))))
                                   (annotate! form prop+)
                                   props)))
                     (or (= count 1)    ; ugh
                         (begin
                           (read-line p)
                           (set! count (1+ count))))
                     (nb! `'(following-form-properties
                             :line ,n
                             :line-count ,count
                             ,@props))
                     (nb! form))))
            (loop (1+ (port-line p)) (read-line p)))))))

;;; entry points

(define (read-scheme-source-silently . files)
  "See commentary in module (scripts read-scheme-source)."
  (let* ((res '()))
    (for-each (lambda (file)
                (process file (lambda (e) (set! res (cons e res)))))
              files)
    (reverse res)))

(define (read-scheme-source . files)
  "See commentary in module (scripts read-scheme-source)."
  (for-each (lambda (file)
              (process file (lambda (e) (write e) (newline))))
            files))

;; Recognize:          (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
;; and return alist:   ((TAG1 . VAL1) (TAG2 . VAL2) ...)
;; where the tags are symbols.
;;
(define (quoted? sym form)
  (and (list? form)
       (= 2 (length form))
       (eq? 'quote (car form))
       (let ((inside (cadr form)))
         (and (list? inside)
              (< 0 (length inside))
              (eq? sym (car inside))
              (let loop ((ls (cdr inside)) (alist '()))
                (if (null? ls)
                    alist               ; retval
                    (let ((first (car ls)))
                      (or (symbol? first)
                          (error "bad list!"))
                      (loop (cddr ls)
                            (acons (string->symbol
                                    (substring (symbol->string first) 1))
                                   (cadr ls)
                                   alist)))))))))

;; Filter FORMS, combining contiguous comment forms that have the same number
;; of leading semicolons.  Do not include in them whitespace lines.
;; Whitespace lines outside of such comment groupings are ignored, as are
;; hash-bang comments.  All other forms are passed through unchanged.
;;
(define (clump forms)
  (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
    (if (null? forms)
        (reverse acc)                   ; retval
        (let ((form (car forms)))
          (cond (pass-this-one-through?
                 (loop (cdr forms) (cons form acc) #f))
                ((quoted? 'following-form-properties form)
                 (loop (cdr forms) (cons form acc) #t))
                ((quoted? 'whitespace form)             ;;; ignore
                 (loop (cdr forms) acc #f))
                ((quoted? 'hash-bang-comment form)      ;;; ignore for now
                 (loop (cdr forms) acc #f))
                ((quoted? 'comment form)
                 => (lambda (alist)
                      (let cloop ((inner-forms (cdr forms))
                                  (level (assq-ref alist 'leading-semicolons))
                                  (text (list (assq-ref alist 'text))))
                        (let ((up (lambda ()
                                    (loop inner-forms
                                          (cons (cons level (reverse text))
                                                acc)
                                          #f))))
                          (if (null? inner-forms)
                              (up)
                              (let ((inner-form (car inner-forms)))
                                (cond ((quoted? 'comment inner-form)
                                       => (lambda (inner-alist)
                                            (let ((new-level
                                                   (assq-ref
                                                    inner-alist
                                                    'leading-semicolons)))
                                              (if (= new-level level)
                                                  (cloop (cdr inner-forms)
                                                         level
                                                         (cons (assq-ref
                                                                inner-alist
                                                                'text)
                                                               text))
                                                  (up)))))
                                      (else (up)))))))))
                (else (loop (cdr forms) (cons form acc) #f)))))))

;;; script entry point

(define main read-scheme-source)

;;; read-scheme-source ends here
